# librerías necesarias para implementar las funciones
library(readxl)
library(glue)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(ggmosaic)
library(ggridges)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ data.table::between() masks dplyr::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ data.table::first() masks dplyr::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ data.table::last() masks dplyr::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ lubridate::second() masks data.table::second()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pastecs)
##
## Attaching package: 'pastecs'
##
## The following object is masked from 'package:tidyr':
##
## extract
##
## The following objects are masked from 'package:data.table':
##
## first, last
##
## The following objects are masked from 'package:dplyr':
##
## first, last
library(xtable)
library(here)
## here() starts at /Users/sofiabocker/Desktop/universidad/UCR/Actuariales/Cuarto año/I Ciclo/Estadística Actuarial I/Proyecto/cod
library(skimr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(rcompanion)
library(RCurl)
##
## Attaching package: 'RCurl'
##
## The following object is masked from 'package:tidyr':
##
## complete
# importar base de datos
# Link de la base de datos en github
url <- "https://raw.githubusercontent.com/sofiabocker/proyecto_ca_0303_g08/main/base_datos_alcohol.xlsx"
# descargar el archivo
binary_data <- getBinaryURL(url)
temp_file <- tempfile(fileext = ".xlsx")
writeBin(binary_data, temp_file)
# leer el archivo en R
base_datos <- read_excel(temp_file)
## New names:
## • `` -> `...32`
## • `` -> `...33`
base_datos <- base_datos [, -32]
base_datos <- base_datos [, -32]
base_datos <- head(base_datos, -25)
# Comprimir las variables de 5 categorías en variables de tres categorías <
base_datos_clean <- base_datos %>%
clean_names() %>%
mutate(alcohol_weekdays = fct_collapse(
alcohol_weekdays,
Low = c("Low", "Very Low"),
High = c("High", "Very High"),
Moderate = "Moderate"
))
# Asegurarse que los datos se mantengan como characters
base_datos_clean$alcohol_weekdays <- as.character(base_datos_clean$alcohol_weekdays)
base_datos_clean <- base_datos_clean %>%
clean_names() %>%
mutate(alcohol_weekends = fct_collapse(
alcohol_weekends,
Low = c("Low", "Very Low"),
High = c("High", "Very High"),
Moderate = "Moderate"
))
base_datos_clean$alcohol_weekends <- as.character(base_datos_clean$alcohol_weekends)
base_datos_clean <- base_datos_clean %>%
clean_names() %>%
mutate(health_status = fct_collapse(
health_status ,
Poor = c("Poor", "Very Poor"),
Good = c("Very Good", "Good"),
Fair = "Fair"
))
base_datos_clean$health_status <- as.character(base_datos_clean$health_status)
base_datos_clean <- base_datos_clean %>%
clean_names() %>%
mutate(good_family_relationship = fct_collapse(
good_family_relationship,
Poor = c("Poor", "Very Poor"),
Good = c("Excellent", "Good"),
Fair = "Fair"
))
base_datos_clean$good_family_relationship <- as.character(base_datos_clean$good_family_relationship)
base_datos_clean <- base_datos_clean %>%
clean_names() %>%
mutate(free_time_after_school = fct_collapse(
free_time_after_school,
Low = c("Low", "Very Low"),
High = c("High", "Very High"),
Moderate = "Moderate"
))
base_datos_clean$free_time_after_school <- as.character(base_datos_clean$free_time_after_school)
base_datos_clean <- base_datos_clean %>%
clean_names() %>%
mutate(time_with_friends = fct_collapse(
time_with_friends,
Low = c("Low", "Very Low"),
High = c("High", "Very High"),
Moderate = "Moderate"
))
base_datos_clean$time_with_friends <- as.character(base_datos_clean$time_with_friends)
# muestra la estructura de los datos
str <- str(base_datos_clean)
## tibble [649 × 31] (S3: tbl_df/tbl/data.frame)
## $ school : chr [1:649] "Gabriel Pereira" "Gabriel Pereira" "Gabriel Pereira" "Gabriel Pereira" ...
## $ gender : chr [1:649] "Female" "Female" "Female" "Female" ...
## $ age : num [1:649] 18 17 15 15 16 16 16 17 15 15 ...
## $ housing_type : chr [1:649] "Urban" "Urban" "Urban" "Urban" ...
## $ family_size : chr [1:649] "Above 3" "Above 3" "Up to 3" "Above 3" ...
## $ parental_status : chr [1:649] "Separated" "Living Together" "Living Together" "Living Together" ...
## $ mother_education : chr [1:649] "Higher Education" "Primary School" "Primary School" "Higher Education" ...
## $ father_education : chr [1:649] "Higher Education" "Primary School" "Primary School" "Lower Secondary School" ...
## $ mother_work : chr [1:649] "Homemaker" "Homemaker" "Homemaker" "Health" ...
## $ father_work : chr [1:649] "Teacher" "other" "other" "Services" ...
## $ reason_school_choice : chr [1:649] "Course Preference" "Course Preference" "Other" "Near Home" ...
## $ legal_responsibility : chr [1:649] "Mother" "Father" "Mother" "Mother" ...
## $ commute_time : chr [1:649] "15 to 30 min" "Up to 15 min" "Up to 15 min" "Up to 15 min" ...
## $ weekly_study_time : chr [1:649] "2 to 5h" "2 to 5h" "2 to 5h" "5 to 10h" ...
## $ extra_educational_support : chr [1:649] "Yes" "No" "Yes" "No" ...
## $ parental_educational_support: chr [1:649] "No" "Yes" "No" "Yes" ...
## $ private_tutoring : chr [1:649] "No" "No" "No" "No" ...
## $ extracurricular_activities : chr [1:649] "No" "No" "No" "Yes" ...
## $ attended_daycare : chr [1:649] "Yes" "No" "Yes" "Yes" ...
## $ desire_graduate_education : chr [1:649] "Yes" "Yes" "Yes" "Yes" ...
## $ has_internet : chr [1:649] "No" "Yes" "Yes" "Yes" ...
## $ is_dating : chr [1:649] "No" "No" "No" "Yes" ...
## $ good_family_relationship : chr [1:649] "Good" "Good" "Good" "Fair" ...
## $ free_time_after_school : chr [1:649] "Moderate" "Moderate" "Moderate" "Low" ...
## $ time_with_friends : chr [1:649] "High" "Moderate" "Low" "Low" ...
## $ alcohol_weekdays : chr [1:649] "Low" "Low" "Low" "Low" ...
## $ alcohol_weekends : chr [1:649] "Low" "Low" "Moderate" "Low" ...
## $ health_status : chr [1:649] "Fair" "Fair" "Fair" "Good" ...
## $ school_absence : num [1:649] 4 2 6 0 0 6 0 2 0 0 ...
## $ grade_1st_semester : num [1:649] 0 9 12 14 11 12 13 10 15 12 ...
## $ grade_2nd_semester : num [1:649] 11 11 13 14 13 12 12 13 16 12 ...
# resumen general de la base de datos
summary(base_datos_clean)
## school gender age housing_type
## Length:649 Length:649 Min. :15.00 Length:649
## Class :character Class :character 1st Qu.:16.00 Class :character
## Mode :character Mode :character Median :17.00 Mode :character
## Mean :16.74
## 3rd Qu.:18.00
## Max. :22.00
## family_size parental_status mother_education father_education
## Length:649 Length:649 Length:649 Length:649
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## mother_work father_work reason_school_choice
## Length:649 Length:649 Length:649
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## legal_responsibility commute_time weekly_study_time
## Length:649 Length:649 Length:649
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## extra_educational_support parental_educational_support private_tutoring
## Length:649 Length:649 Length:649
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## extracurricular_activities attended_daycare desire_graduate_education
## Length:649 Length:649 Length:649
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## has_internet is_dating good_family_relationship
## Length:649 Length:649 Length:649
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## free_time_after_school time_with_friends alcohol_weekdays
## Length:649 Length:649 Length:649
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## alcohol_weekends health_status school_absence grade_1st_semester
## Length:649 Length:649 Min. : 0.000 Min. : 0.0
## Class :character Class :character 1st Qu.: 0.000 1st Qu.:10.0
## Mode :character Mode :character Median : 2.000 Median :11.0
## Mean : 3.659 Mean :11.4
## 3rd Qu.: 6.000 3rd Qu.:13.0
## Max. :32.000 Max. :19.0
## grade_2nd_semester
## Min. : 0.00
## 1st Qu.:10.00
## Median :11.00
## Mean :11.57
## 3rd Qu.:13.00
## Max. :19.00
# explora data
skimr::skim(base_datos_clean)
| Name | base_datos_clean |
| Number of rows | 649 |
| Number of columns | 31 |
| _______________________ | |
| Column type frequency: | |
| character | 27 |
| numeric | 4 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| school | 0 | 1 | 15 | 20 | 0 | 2 | 0 |
| gender | 0 | 1 | 4 | 6 | 0 | 2 | 0 |
| housing_type | 0 | 1 | 5 | 5 | 0 | 2 | 0 |
| family_size | 0 | 1 | 7 | 7 | 0 | 2 | 0 |
| parental_status | 0 | 1 | 9 | 15 | 0 | 2 | 0 |
| mother_education | 0 | 1 | 4 | 22 | 0 | 5 | 0 |
| father_education | 0 | 1 | 4 | 22 | 0 | 5 | 0 |
| mother_work | 0 | 1 | 5 | 9 | 0 | 5 | 0 |
| father_work | 0 | 1 | 5 | 9 | 0 | 5 | 0 |
| reason_school_choice | 0 | 1 | 5 | 17 | 0 | 4 | 0 |
| legal_responsibility | 0 | 1 | 5 | 6 | 0 | 3 | 0 |
| commute_time | 0 | 1 | 12 | 12 | 0 | 4 | 0 |
| weekly_study_time | 0 | 1 | 7 | 13 | 0 | 4 | 0 |
| extra_educational_support | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| parental_educational_support | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| private_tutoring | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| extracurricular_activities | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| attended_daycare | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| desire_graduate_education | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| has_internet | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| is_dating | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| good_family_relationship | 0 | 1 | 4 | 4 | 0 | 3 | 0 |
| free_time_after_school | 0 | 1 | 3 | 8 | 0 | 3 | 0 |
| time_with_friends | 0 | 1 | 3 | 8 | 0 | 3 | 0 |
| alcohol_weekdays | 0 | 1 | 3 | 8 | 0 | 3 | 0 |
| alcohol_weekends | 0 | 1 | 3 | 8 | 0 | 3 | 0 |
| health_status | 0 | 1 | 4 | 4 | 0 | 3 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 0 | 1 | 16.74 | 1.22 | 15 | 16 | 17 | 18 | 22 | ▇▅▅▁▁ |
| school_absence | 0 | 1 | 3.66 | 4.64 | 0 | 0 | 2 | 6 | 32 | ▇▂▁▁▁ |
| grade_1st_semester | 0 | 1 | 11.40 | 2.75 | 0 | 10 | 11 | 13 | 19 | ▁▂▇▇▁ |
| grade_2nd_semester | 0 | 1 | 11.57 | 2.91 | 0 | 10 | 11 | 13 | 19 | ▁▁▇▇▂ |
# crear un dataframe con sólo las columnas con valores numéricos
base_datos_num <- base_datos_clean %>% select_if(is.numeric)
base_datos_num
## # A tibble: 649 × 4
## age school_absence grade_1st_semester grade_2nd_semester
## <dbl> <dbl> <dbl> <dbl>
## 1 18 4 0 11
## 2 17 2 9 11
## 3 15 6 12 13
## 4 15 0 14 14
## 5 16 0 11 13
## 6 16 6 12 12
## 7 16 0 13 12
## 8 17 2 10 13
## 9 15 0 15 16
## 10 15 0 12 12
## # ℹ 639 more rows
# brinda estadísticas más específicas
estadisticas <- stat.desc(base_datos_num)
estadisticas
## age school_absence grade_1st_semester grade_2nd_semester
## nbr.val 6.490000e+02 649.0000000 649.0000000 649.0000000
## nbr.null 0.000000e+00 244.0000000 1.0000000 7.0000000
## nbr.na 0.000000e+00 0.0000000 0.0000000 0.0000000
## min 1.500000e+01 0.0000000 0.0000000 0.0000000
## max 2.200000e+01 32.0000000 19.0000000 19.0000000
## range 7.000000e+00 32.0000000 19.0000000 19.0000000
## sum 1.086700e+04 2375.0000000 7398.0000000 7509.0000000
## median 1.700000e+01 2.0000000 11.0000000 11.0000000
## mean 1.674422e+01 3.6594761 11.3990755 11.5701079
## SE.mean 4.781608e-02 0.1821657 0.1077611 0.1143703
## CI.mean.0.95 9.389318e-02 0.3577064 0.2116031 0.2245812
## var 1.483859e+00 21.5366423 7.5364806 8.4892903
## std.dev 1.218138e+00 4.6407588 2.7452651 2.9136387
## coef.var 7.274973e-02 1.2681484 0.2408323 0.2518247
# crea un histograma para cada columna cuantitativa
lapply(names(base_datos_num), function(col_name) {
col <- base_datos_num[[col_name]]
ggplot(data.frame(col), aes(x = col)) +
geom_histogram(binwidth = 1, fill = "blue") +
labs(title = col_name, x = col_name, y = "Frequencia")
})
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
# crea un gráfico de densidad para cada columna cuantitativa
lapply(names(base_datos_num), function(col_name) {
col <- base_datos_num[[col_name]]
ggplot(data.frame(col), aes(x = col)) +
geom_density() +
labs(x = col_name)
})
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
# crear gráficos de barra para cada columna cuantitativa
lapply(names(base_datos_num), function(col_name) {
col <- base_datos_num[[col_name]]
ggplot(data.frame(col), aes(x = col)) +
geom_bar(stat = "count", fill = "darkred") +
labs(title = col_name, x = col_name, y = "")
})
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
# crear un dataframe con sólo las columnas de string
base_datos_str <- base_datos_clean %>% select_if(is.character)
base_datos_str
## # A tibble: 649 × 27
## school gender housing_type family_size parental_status mother_education
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Gabriel Per… Female Urban Above 3 Separated Higher Education
## 2 Gabriel Per… Female Urban Above 3 Living Together Primary School
## 3 Gabriel Per… Female Urban Up to 3 Living Together Primary School
## 4 Gabriel Per… Female Urban Above 3 Living Together Higher Education
## 5 Gabriel Per… Female Urban Above 3 Living Together High School
## 6 Gabriel Per… Male Urban Up to 3 Living Together Higher Education
## 7 Gabriel Per… Male Urban Up to 3 Living Together Lower Secondary…
## 8 Gabriel Per… Female Urban Above 3 Separated Higher Education
## 9 Gabriel Per… Male Urban Up to 3 Separated High School
## 10 Gabriel Per… Male Urban Above 3 Living Together High School
## # ℹ 639 more rows
## # ℹ 21 more variables: father_education <chr>, mother_work <chr>,
## # father_work <chr>, reason_school_choice <chr>, legal_responsibility <chr>,
## # commute_time <chr>, weekly_study_time <chr>,
## # extra_educational_support <chr>, parental_educational_support <chr>,
## # private_tutoring <chr>, extracurricular_activities <chr>,
## # attended_daycare <chr>, desire_graduate_education <chr>, …
# crear gráficos de barra para cada columna cualitativa
lapply(names(base_datos_str), function(col_name) {
col <- base_datos_str[[col_name]]
ggplot(data.frame(col), aes(x = col)) +
geom_bar(stat = "count", fill = "darkred") +
labs(title = col_name, x = col_name, y = "")
})
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
##
## [[25]]
##
## [[26]]
##
## [[27]]
# Relaciona la nota del primer semestre con la cantidad de alcohol consumida entre semana
# crear el gráfico con fondo blanco y color azul oscuro para las densidades
ggplot(base_datos_clean, aes(x = grade_1st_semester, y = alcohol_weekdays, group = alcohol_weekdays)) +
geom_density_ridges(fill = "darkblue", color = "white") +
theme_minimal() +
theme(
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_line(color = "grey90")
)
## Picking joint bandwidth of 0.747
# Relaciona la nota del primer semestre con la cantidad de alcohol consumida en fin de semana
# crear el gráfico con fondo blanco y color azul oscuro para las densidades
ggplot(base_datos_clean, aes(x = grade_1st_semester, y = alcohol_weekends, group = alcohol_weekends)) +
geom_density_ridges(fill = "darkblue", color = "white") +
theme_minimal() +
theme(
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_line(color = "grey90")
)
## Picking joint bandwidth of 0.822
# Relaciona la nota del segundo semestre con la cantidad de alcohol consumida entre semana
ggplot(base_datos_clean, aes(x = grade_2nd_semester, y = alcohol_weekdays, group = alcohol_weekdays)) +
geom_density_ridges(fill = "darkblue", color = "white") +
theme_minimal() +
theme(
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_line(color = "grey90")
)
## Picking joint bandwidth of 0.678
# Relaciona la nota del segundo semestre con la cantidad de alcohol consumida en fin de semana
ggplot(base_datos_clean, aes(x = grade_2nd_semester, y = alcohol_weekends, group = alcohol_weekends)) +
geom_density_ridges(fill = "darkblue", color = "white") +
theme_minimal() +
theme(
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_line(color = "grey90")
)
## Picking joint bandwidth of 0.856
# crear un mapa de calor
create_heatmap <- function(col_name) {
count_data <- base_datos_str %>% count(alcohol_weekdays, !!sym(col_name))
ggplot(count_data, aes(x = alcohol_weekdays, y = !!sym(col_name))) +
geom_tile(aes(fill = n), color = "white") +
scale_fill_gradient(low = "white", high = "darkblue") +
labs(title = paste("Comparación de alcohol entre semana con", col_name),
x = "Alcohol entre semana", y = col_name)
}
# aplicar la función a tods las columnas
heatmap_plots <- lapply(names(base_datos_str)[-which(names(base_datos_str) == "alcohol_weekdays")], create_heatmap)
print(heatmap_plots)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
##
## [[25]]
##
## [[26]]
# crear un mapa de calor
create_heatmap <- function(col_name) {
count_data <- base_datos_str %>% count(alcohol_weekends, !!sym(col_name))
ggplot(count_data, aes(x = alcohol_weekends, y = !!sym(col_name))) +
geom_tile(aes(fill = n), color = "white") +
scale_fill_gradient(low = "white", high = "darkblue") +
labs(title = paste("Comparación de alcohol en fin de semana con", col_name),
x = "Alcohol en fin de semana", y = col_name)
}
# aplicar la unción a todas las columnas
heatmap_plots <- lapply(names(base_datos_str)[-which(names(base_datos_str) == "alcohol_weekends")], create_heatmap)
print(heatmap_plots)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
##
## [[25]]
##
## [[26]]
# Crear tablas de contingencia para cada columna cualitativa y la de cantidad de alcohol entre semana
tablas_contingencias_1 <- lapply(base_datos_str, function(col) {
table(col, base_datos_str$alcohol_weekdays)
})
print(tablas_contingencias_1)
## $school
##
## col High Low Moderate
## Gabriel Pereira 22 379 22
## Mousinho da Silveira 12 193 21
##
## $gender
##
## col High Low Moderate
## Female 9 363 11
## Male 25 209 32
##
## $housing_type
##
## col High Low Moderate
## Rural 10 168 19
## Urban 24 404 24
##
## $family_size
##
## col High Low Moderate
## Above 3 23 408 26
## Up to 3 11 164 17
##
## $parental_status
##
## col High Low Moderate
## Living Together 31 501 37
## Separated 3 71 6
##
## $mother_education
##
## col High Low Moderate
## High School 8 118 13
## Higher Education 9 155 11
## Lower Secondary School 7 173 6
## None 0 5 1
## Primary School 10 121 12
##
## $father_education
##
## col High Low Moderate
## High School 5 117 9
## Higher Education 7 110 11
## Lower Secondary School 11 188 10
## None 0 7 0
## Primary School 11 150 13
##
## $mother_work
##
## col High Low Moderate
## Health 0 45 3
## Homemaker 8 119 8
## other 14 229 15
## Services 9 118 9
## Teacher 3 61 8
##
## $father_work
##
## col High Low Moderate
## Health 1 20 2
## Homemaker 0 39 3
## other 17 329 21
## Services 14 150 17
## Teacher 2 34 0
##
## $reason_school_choice
##
## col High Low Moderate
## Course Preference 13 258 14
## Near Home 10 127 12
## Other 7 56 9
## Reputation 4 131 8
##
## $legal_responsibility
##
## col High Low Moderate
## Father 8 133 12
## Mother 20 408 27
## Other 6 31 4
##
## $commute_time
##
## col High Low Moderate
## 15 to 30 min 11 189 13
## 30 min to 1h 4 42 8
## More than 1h 3 12 1
## Up to 15 min 16 329 21
##
## $weekly_study_time
##
## col High Low Moderate
## 2 to 5h 14 278 13
## 5 to 10h 2 94 1
## More than 10h 2 29 4
## Up to 2h 16 171 25
##
## $extra_educational_support
##
## col High Low Moderate
## No 30 510 41
## Yes 4 62 2
##
## $parental_educational_support
##
## col High Low Moderate
## No 12 215 24
## Yes 22 357 19
##
## $private_tutoring
##
## col High Low Moderate
## No 31 539 40
## Yes 3 33 3
##
## $extracurricular_activities
##
## col High Low Moderate
## No 14 296 24
## Yes 20 276 19
##
## $attended_daycare
##
## col High Low Moderate
## No 10 109 9
## Yes 24 463 34
##
## $desire_graduate_education
##
## col High Low Moderate
## No 8 55 6
## Yes 26 517 37
##
## $has_internet
##
## col High Low Moderate
## No 5 135 11
## Yes 29 437 32
##
## $is_dating
##
## col High Low Moderate
## No 14 364 32
## Yes 20 208 11
##
## $good_family_relationship
##
## col High Low Moderate
## Fair 6 92 3
## Good 24 440 33
## Poor 4 40 7
##
## $free_time_after_school
##
## col High Low Moderate
## High 17 203 26
## Low 7 135 10
## Moderate 10 234 7
##
## $time_with_friends
##
## col High Low Moderate
## High 23 202 26
## Low 4 182 7
## Moderate 7 188 10
##
## $alcohol_weekdays
##
## col High Low Moderate
## High 34 0 0
## Low 0 572 0
## Moderate 0 0 43
##
## $alcohol_weekends
##
## col High Low Moderate
## High 26 74 32
## Low 4 391 2
## Moderate 4 107 9
##
## $health_status
##
## col High Low Moderate
## Fair 9 110 5
## Good 19 310 28
## Poor 6 152 10
# crear una representación gráfica de las tablas de contingencia
lapply(seq_along(tablas_contingencias_1), function(i) {
mosaicplot(tablas_contingencias_1[[i]],
color = TRUE,
xlab = "Alcohol entre semana",
ylab = names(tablas_contingencias_1[[i]])[2],
main = paste("Alcohol entre Semana y", names(base_datos_str)[i][1]))
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
##
## [[10]]
## NULL
##
## [[11]]
## NULL
##
## [[12]]
## NULL
##
## [[13]]
## NULL
##
## [[14]]
## NULL
##
## [[15]]
## NULL
##
## [[16]]
## NULL
##
## [[17]]
## NULL
##
## [[18]]
## NULL
##
## [[19]]
## NULL
##
## [[20]]
## NULL
##
## [[21]]
## NULL
##
## [[22]]
## NULL
##
## [[23]]
## NULL
##
## [[24]]
## NULL
##
## [[25]]
## NULL
##
## [[26]]
## NULL
##
## [[27]]
## NULL
# aplicar la prueba de independencia de chi-cuadrado a cada tabla de contingencia
chi_cuadrado_1 <- lapply(tablas_contingencias_1, chisq.test)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
chi_cuadrado_1
## $school
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 4.0191, df = 2, p-value = 0.134
##
##
## $gender
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 39.436, df = 2, p-value = 2.733e-09
##
##
## $housing_type
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 4.1675, df = 2, p-value = 0.1245
##
##
## $family_size
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 2.3978, df = 2, p-value = 0.3015
##
##
## $parental_status
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 0.49529, df = 2, p-value = 0.7806
##
##
## $mother_education
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 9.3106, df = 8, p-value = 0.3168
##
##
## $father_education
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 4.1102, df = 8, p-value = 0.847
##
##
## $mother_work
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 6.1653, df = 8, p-value = 0.6287
##
##
## $father_work
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 10.683, df = 8, p-value = 0.2203
##
##
## $reason_school_choice
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 12.356, df = 6, p-value = 0.05448
##
##
## $legal_responsibility
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 9.6798, df = 4, p-value = 0.04618
##
##
## $commute_time
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 13.687, df = 6, p-value = 0.03333
##
##
## $weekly_study_time
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 23.815, df = 6, p-value = 0.0005648
##
##
## $extra_educational_support
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 1.696, df = 2, p-value = 0.4283
##
##
## $parental_educational_support
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 5.7747, df = 2, p-value = 0.05572
##
##
## $private_tutoring
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 0.60637, df = 2, p-value = 0.7385
##
##
## $extracurricular_activities
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 1.7848, df = 2, p-value = 0.4097
##
##
## $attended_daycare
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 2.2162, df = 2, p-value = 0.3302
##
##
## $desire_graduate_education
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 7.0739, df = 2, p-value = 0.0291
##
##
## $has_internet
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 1.5606, df = 2, p-value = 0.4583
##
##
## $is_dating
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 9.4615, df = 2, p-value = 0.00882
##
##
## $good_family_relationship
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 7.4854, df = 4, p-value = 0.1124
##
##
## $free_time_after_school
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 15.161, df = 4, p-value = 0.004379
##
##
## $time_with_friends
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 24.017, df = 4, p-value = 7.925e-05
##
##
## $alcohol_weekdays
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 1298, df = 4, p-value < 2.2e-16
##
##
## $alcohol_weekends
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 171.75, df = 4, p-value < 2.2e-16
##
##
## $health_status
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 4.2113, df = 4, p-value = 0.3782
# Crear tablas de contingencia para cada columna cualitativa y la de cantidad de alcohol en fines de semana
tablas_contingencias_2 <- lapply(base_datos_str, function(col) {
table(col, base_datos_str$alcohol_weekends)
})
print(tablas_contingencias_2)
## $school
##
## col High Low Moderate
## Gabriel Pereira 87 259 77
## Mousinho da Silveira 45 138 43
##
## $gender
##
## col High Low Moderate
## Female 37 275 71
## Male 95 122 49
##
## $housing_type
##
## col High Low Moderate
## Rural 39 120 38
## Urban 93 277 82
##
## $family_size
##
## col High Low Moderate
## Above 3 85 288 84
## Up to 3 47 109 36
##
## $parental_status
##
## col High Low Moderate
## Living Together 117 341 111
## Separated 15 56 9
##
## $mother_education
##
## col High Low Moderate
## High School 33 75 31
## Higher Education 35 110 30
## Lower Secondary School 28 128 30
## None 2 3 1
## Primary School 34 81 28
##
## $father_education
##
## col High Low Moderate
## High School 30 70 31
## Higher Education 29 80 19
## Lower Secondary School 39 133 37
## None 0 7 0
## Primary School 34 107 33
##
## $mother_work
##
## col High Low Moderate
## Health 10 26 12
## Homemaker 27 84 24
## other 46 168 44
## Services 31 75 30
## Teacher 18 44 10
##
## $father_work
##
## col High Low Moderate
## Health 6 17 0
## Homemaker 7 31 4
## other 71 221 75
## Services 45 99 37
## Teacher 3 29 4
##
## $reason_school_choice
##
## col High Low Moderate
## Course Preference 59 181 45
## Near Home 32 89 28
## Other 19 39 14
## Reputation 22 88 33
##
## $legal_responsibility
##
## col High Low Moderate
## Father 32 94 27
## Mother 92 280 83
## Other 8 23 10
##
## $commute_time
##
## col High Low Moderate
## 15 to 30 min 43 130 40
## 30 min to 1h 12 31 11
## More than 1h 7 9 0
## Up to 15 min 70 227 69
##
## $weekly_study_time
##
## col High Low Moderate
## 2 to 5h 52 193 60
## 5 to 10h 5 72 20
## More than 10h 6 25 4
## Up to 2h 69 107 36
##
## $extra_educational_support
##
## col High Low Moderate
## No 123 351 107
## Yes 9 46 13
##
## $parental_educational_support
##
## col High Low Moderate
## No 64 145 42
## Yes 68 252 78
##
## $private_tutoring
##
## col High Low Moderate
## No 120 375 115
## Yes 12 22 5
##
## $extracurricular_activities
##
## col High Low Moderate
## No 61 210 63
## Yes 71 187 57
##
## $attended_daycare
##
## col High Low Moderate
## No 33 71 24
## Yes 99 326 96
##
## $desire_graduate_education
##
## col High Low Moderate
## No 20 35 14
## Yes 112 362 106
##
## $has_internet
##
## col High Low Moderate
## No 27 101 23
## Yes 105 296 97
##
## $is_dating
##
## col High Low Moderate
## No 87 248 75
## Yes 45 149 45
##
## $good_family_relationship
##
## col High Low Moderate
## Fair 26 55 20
## Good 92 314 91
## Poor 14 28 9
##
## $free_time_after_school
##
## col High Low Moderate
## High 66 126 54
## Low 26 106 20
## Moderate 40 165 46
##
## $time_with_friends
##
## col High Low Moderate
## High 95 106 50
## Low 14 154 25
## Moderate 23 137 45
##
## $alcohol_weekdays
##
## col High Low Moderate
## High 26 4 4
## Low 74 391 107
## Moderate 32 2 9
##
## $alcohol_weekends
##
## col High Low Moderate
## High 132 0 0
## Low 0 397 0
## Moderate 0 0 120
##
## $health_status
##
## col High Low Moderate
## Fair 24 78 22
## Good 85 207 65
## Poor 23 112 33
# crear una representación gráfica de las tablas de contingencia
lapply(seq_along(tablas_contingencias_2), function(i) {
mosaicplot(tablas_contingencias_2[[i]],
color = TRUE,
xlab = "Alcohol Fin de Semana",
ylab = names(tablas_contingencias_2[[i]])[2],
main = paste("Alcohol Fin de Semana y", names(base_datos_str)[i][1]))
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
##
## [[10]]
## NULL
##
## [[11]]
## NULL
##
## [[12]]
## NULL
##
## [[13]]
## NULL
##
## [[14]]
## NULL
##
## [[15]]
## NULL
##
## [[16]]
## NULL
##
## [[17]]
## NULL
##
## [[18]]
## NULL
##
## [[19]]
## NULL
##
## [[20]]
## NULL
##
## [[21]]
## NULL
##
## [[22]]
## NULL
##
## [[23]]
## NULL
##
## [[24]]
## NULL
##
## [[25]]
## NULL
##
## [[26]]
## NULL
##
## [[27]]
## NULL
Se escogió el nivel estándar de significancia donde alpha = 5, esto quiere decir que si el p-valor es menor a 5%, entonces, la probabilidad de que esas dos variables sean independientes es muy baja, por lo que se rechaza H_0.
# aplicar la prueba de independencia de chi-cuadrado a cada tabla de contingencia
chi_cuadrado_2 <- lapply(tablas_contingencias_2, chisq.test)
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
chi_cuadrado_2
## $school
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 0.085819, df = 2, p-value = 0.958
##
##
## $gender
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 69.654, df = 2, p-value = 7.495e-16
##
##
## $housing_type
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 0.14167, df = 2, p-value = 0.9316
##
##
## $family_size
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 3.171, df = 2, p-value = 0.2049
##
##
## $parental_status
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 3.8628, df = 2, p-value = 0.1449
##
##
## $mother_education
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 10.255, df = 8, p-value = 0.2476
##
##
## $father_education
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 9.9856, df = 8, p-value = 0.266
##
##
## $mother_work
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 7.0431, df = 8, p-value = 0.532
##
##
## $father_work
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 18.679, df = 8, p-value = 0.01668
##
##
## $reason_school_choice
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 6.8145, df = 6, p-value = 0.3383
##
##
## $legal_responsibility
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 1.0722, df = 4, p-value = 0.8987
##
##
## $commute_time
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 8.0027, df = 6, p-value = 0.2379
##
##
## $weekly_study_time
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 37.497, df = 6, p-value = 1.409e-06
##
##
## $extra_educational_support
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 2.4215, df = 2, p-value = 0.298
##
##
## $parental_educational_support
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 6.8137, df = 2, p-value = 0.03314
##
##
## $private_tutoring
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 3.0945, df = 2, p-value = 0.2128
##
##
## $extracurricular_activities
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 1.8354, df = 2, p-value = 0.3994
##
##
## $attended_daycare
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 3.1753, df = 2, p-value = 0.2044
##
##
## $desire_graduate_education
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 4.3507, df = 2, p-value = 0.1136
##
##
## $has_internet
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 2.7657, df = 2, p-value = 0.2509
##
##
## $is_dating
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 0.53281, df = 2, p-value = 0.7661
##
##
## $good_family_relationship
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 5.057, df = 4, p-value = 0.2815
##
##
## $free_time_after_school
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 18.865, df = 4, p-value = 0.0008356
##
##
## $time_with_friends
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 94.004, df = 4, p-value < 2.2e-16
##
##
## $alcohol_weekdays
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 171.75, df = 4, p-value < 2.2e-16
##
##
## $alcohol_weekends
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 1298, df = 4, p-value < 2.2e-16
##
##
## $health_status
##
## Pearson's Chi-squared test
##
## data: X[[i]]
## X-squared = 7.4814, df = 4, p-value = 0.1125
Este modelo permite responder parcialmente o totalmente la pregunta de investigación debido a que brinda información sobre qué factores poseen una mayor influencia en el consumo de alcohol en estudiantes adolescentes. Para poder interpretar los resultados, se puede observar el output que se generó al comparar las columnas alcohol_weekday y alcohol_weekend con el resto de las variables
# asegurarse que los datos sean factores
base_datos_str[] <- lapply(base_datos_str, as.factor)
# generar la V de Cramer para cada columna con alcohol_weekdays
v_cramer_entre_semana <- sapply(base_datos_str, function(col) {
cramerV(base_datos_str$alcohol_weekdays, col, ci = FALSE, conf = 0.95, type = "perc", R = 1000, histogram = FALSE, digits = 4, bias.correct = FALSE, reportIncomplete = FALSE, verbose = FALSE, tolerance = 1e-16)
})
print(v_cramer_entre_semana)
## school.Cramer V gender.Cramer V
## 0.07869 0.24650
## housing_type.Cramer V family_size.Cramer V
## 0.08013 0.06078
## parental_status.Cramer V mother_education.Cramer V
## 0.02763 0.08469
## father_education.Cramer V mother_work.Cramer V
## 0.05627 0.06892
## father_work.Cramer V reason_school_choice.Cramer V
## 0.09072 0.09757
## legal_responsibility.Cramer V commute_time.Cramer V
## 0.08636 0.10270
## weekly_study_time.Cramer V extra_educational_support.Cramer V
## 0.13550 0.05112
## parental_educational_support.Cramer V private_tutoring.Cramer V
## 0.09433 0.03057
## extracurricular_activities.Cramer V attended_daycare.Cramer V
## 0.05244 0.05844
## desire_graduate_education.Cramer V has_internet.Cramer V
## 0.10440 0.04904
## is_dating.Cramer V good_family_relationship.Cramer V
## 0.12070 0.07594
## free_time_after_school.Cramer V time_with_friends.Cramer V
## 0.10810 0.13600
## alcohol_weekdays.Cramer V alcohol_weekends.Cramer V
## 1.00000 0.36380
## health_status.Cramer V
## 0.05696
# generar la V de Cramer para cada columna con alcohol_weekends
v_cramer_fin_semana <- sapply(base_datos_str, function(col) {
cramerV(base_datos_str$alcohol_weekends, col, ci = FALSE, conf = 0.95, type = "perc", R = 1000, histogram = FALSE, digits = 4, bias.correct = FALSE, reportIncomplete = FALSE, verbose = FALSE, tolerance = 1e-16)
})
print(v_cramer_fin_semana)
## school.Cramer V gender.Cramer V
## 0.01150 0.32760
## housing_type.Cramer V family_size.Cramer V
## 0.01477 0.06990
## parental_status.Cramer V mother_education.Cramer V
## 0.07715 0.08889
## father_education.Cramer V mother_work.Cramer V
## 0.08771 0.07366
## father_work.Cramer V reason_school_choice.Cramer V
## 0.12000 0.07246
## legal_responsibility.Cramer V commute_time.Cramer V
## 0.02874 0.07852
## weekly_study_time.Cramer V extra_educational_support.Cramer V
## 0.17000 0.06108
## parental_educational_support.Cramer V private_tutoring.Cramer V
## 0.10250 0.06905
## extracurricular_activities.Cramer V attended_daycare.Cramer V
## 0.05318 0.06995
## desire_graduate_education.Cramer V has_internet.Cramer V
## 0.08188 0.06528
## is_dating.Cramer V good_family_relationship.Cramer V
## 0.02865 0.06242
## free_time_after_school.Cramer V time_with_friends.Cramer V
## 0.12060 0.26910
## alcohol_weekdays.Cramer V alcohol_weekends.Cramer V
## 0.36380 1.00000
## health_status.Cramer V
## 0.07592